**********************************************************
*This file builds vars on graduation rates from the IPEDS*
**********************************************************

*** create the deltas
/* The matching with the location here is more complicated since we have some uni who provide data for past 
cohorts in years where they were not present in the IPEDS. I will assign to those years the location of the 
previous/next obs for that uni and I'll drop obs that are never present in locations */

use "$clean_data_education/graduationRatesReshaped.dta", clear
merge 1:m unitid yearGroup using "$clean_data_education/locations.dta", keep(1 3) // we need locations only for the obs that are availabe
	drop if nonMainland == 1 | (yearG == 1990 & mi(czone)) // dropping uni in alaska and so on and dropping some obs in 1991 that have details for enrollment but not on institution details
		preserve
		keep if _merge == 3
		drop _merge
		save "$clean_data_education/tempMatched.dta", replace
		restore
	sort unitid yearGroup 
	egen index = max(czone), by(unitid)
	gen neverMatched = (index == .)
	drop if neverMatched == 1 // these obs are never present in the years we use for the other files 
	gen closestYear = yearGroup if !mi(czone)
	sort unitid yearGroup ourRatio
		by unitid: replace closestYear = closestYear[_n-1] if mi(closestYear) 
	gsort unitid -yearGroup
		by unitid: replace closestYear = closestYear[_n-1] if mi(closestYear)
	gen yearGroupOriginal = yearGroup
	replace yearGroup = closestYear
	keep if _merge == 1
	drop index neverMatched closestYear czone ourRatio _merge
	merge 1:m unitid yearGroup using "$clean_data_education/locations.dta", assert(2 3) 
		drop if _merge == 2
		drop _merge 
		replace yearGroup = yearGroupOriginal
		drop yearGroupOriginal
	append using "$clean_data_education/tempMatched.dta"
		rm "$clean_data_education/tempMatched.dta"

merge m:1 unitid using "$clean_data_education/institutionControls.dta", keep(3) nogenerate  
	
	drop if yearGroup == 1990 
	drop if forProfit == 1
	drop if freshmenFullTimeAvg < 50
	drop if periods < 3


* drop variables that we do not use 
drop blac* white* hispanic* women* men*

* weighting
foreach i of var total* {
	replace `i' = `i' * ourRatio
	}


** collapse to obtain aggregated values 
**** CZ
foreach v of var * {
	local l`v' : variable label `v'
		if `"`l`v''"' == "" {
		local l`v' "`v'"
		}
	}
collapse (sum) total* , by(czone yearGroup)
foreach v of var * {
	label var `v' "`l`v''"
	}
		
*  rates
foreach v in _4y _2y _L2y {
		foreach i in total {
		gen Gr_`i'`v' = `i'_comp150`v'/`i'_adjCoh`v' if !mi(`i'_adjCoh`v') & !mi(`i'_comp150`v')
		drop if Gr_`i'`v' > 1 & !mi(Gr_`i'`v')
		} 
	}

*levels
gen ln_total_adjCoh_2y = ln(total_adjCoh_2y)
gen ln_total_comp150_2y = ln(total_comp150_2y)

* delta 
sort czone yearGroup
gen d = .
	replace d = 1 if yearGroup == 1994
	replace d = 2 if yearGroup == 2000
	replace d = 3 if yearGroup == 2007
	replace d = 4 if yearGroup == 2014

foreach v of var Gr_* ln_total_adjCoh_2y ln_total_comp150_2y {
	local l`v' : variable label `v'
		if `"`l`v''"' == "" {
		local l`v' "`v'"
		}
	}

xtset czone d
foreach i of var Gr_* ln_total_adjCoh_2y ln_total_comp150_2y {
	gen  d`i' = (`i' - L.`i')*100
	}

foreach v of var Gr_* ln_total_adjCoh_2y ln_total_comp150_2y {
	label var d`v' "`l`v''"
	}
***

xtset, clear
drop d Gr*

sort czone yearGroup 
gen temp = ln_total_adjCoh_2y[_n-1] if (yearGroup == 2000 | yearGroup == 2007 | yearGroup == 2014)
drop ln_total_adjCoh_2y
rename temp ln_total_adjCoh_2y

gen temp = ln_total_comp150_2y[_n-1] if (yearGroup == 2000 | yearGroup == 2007 | yearGroup == 2014)
drop ln_total_comp150_2y
rename temp ln_total_comp150_2y

* generate time variable needed by Ben's code
gen year = 1990 if yearGroup == 2000
	replace year = 2000 if yearGroup == 2007
	replace year = 2008 if yearGroup == 2014
	drop if mi(year)

save "$final_data_outcomes/IPEDS_GraduationCZ.dta", replace
